################################################################################
### Script file to reproduce all the empirical results from the Yildirim, Chang and Williams POQ article
### This includes the manuscript and appendix
################################################################################

# Load libraries
library(tidyverse)
library(dplyr)
library(foreign)
library(RColorBrewer)
library(ggpubr)
library(ggstats)
library(AER)
library(MASS)
library(lme4)
library(fastDummies)
library(Amelia)
library(mitools)
library(stats)
library(ggplot2)

#install.packages("remotes")
#remotes::install_github("iqss/clarify")
library(clarify)

# load the data
z.data <- read.dta("PAC.dta")

# Select only those variables that I care about for the MI procedure
z.data <- z.data[, c('studyid', 'year', 'word_count', 'differ_s', 'differ_g', 'quasi_count', 'approve', 'ideology', 'know_perc', 'age', 'male', 'white', 'married', 'unemployed', 'class', 'income_quartile', 'college', 'inpartisan', 'outpartisan', 'interest_politics', 'attention_campaign', 'watch_news', 'discuss', 'entropy', 'entropy_st', 'interest', 'attention', 'race_black', 'race_ai', 'race_ap', 'engage')]

# Generate dummy variables for the year
z.data <- fastDummies::dummy_cols(z.data, select_columns = "year")

# Set the theme
theme_set(theme_minimal() + theme(legend.title = element_blank(), legend.position = "bottom"))


#######################################################################################
### Table 1: Descriptive statistics of agenda capacity and its origins
#######################################################################################

# Continuous variables
cont.vlist <- list(6, 5, 4, 9, 10, 22, 24, 25, 26, 27, 31)

for(v in cont.vlist){
  print(colnames(z.data[v]))
  print(summary(z.data[[v]]))

  print("Standard Deviation:")  
  print(sd(z.data[[v]], na.rm = TRUE))
  
  perc = mean(is.na(z.data[v])) * 100
  print("Percentage of missing values:")
  print(perc)
}

# Binary and ordinal variables
bin.vlist <- list(6, 4, 7, 11, 12, 16, 17, 18, 22, 23, 26, 27, 28, 29, 30)

for(v in bin.vlist){
  print(colnames(z.data[v]))
  print(table(z.data[v]))
  print(prop.table(table(z.data[v])))
  
  perc = mean(is.na(z.data[v])) * 100
  print("Percentage of missing values")
  print(perc)
}

#######################################################################################
### Empirical challenge #1: unit heterogeneity in agenda capacity across survey year
#######################################################################################

### Nominal agenda capacity
mn.qc.y <- aggregate(z.data$quasi_count, list(z.data$year), FUN = mean, na.rm = TRUE)
mn.qc.y

### Thematic agenda capacity
mn.ds.y <- aggregate(z.data$differ_s, list(z.data$year), FUN = mean, na.rm = TRUE)
mn.ds.y


#######################################################################################
# Empirical challenge #2: Missingness
#######################################################################################

### Percentage missing by survey
z.data %>% 
  group_by(year) %>% 
  summarise(m_know_perc = mean(is.na(know_perc)) * 100,
            m_interest_politics = mean(is.na(interest_politics)) * 100,
            m_discuss = mean(is.na(discuss)) * 100,
            m_differ_s = mean(is.na(differ_s))* 100,
            m_quasi_count = mean(is.na(quasi_count)) * 100
  )

### Listwise deletion: number of observations
qc.ld <- glm(quasi_count ~ college + know_perc  
             + interest + attention + watch_news + discuss
             + inpartisan + approve 
             + entropy + entropy_st               
             + male + race_black + race_ai + race_ap + age + income_quartile, 
             family = poisson, data = z.data)
nobs(qc.ld)

### Evidence from a linear probability model that missing observations are more likely to occur in low cognitive and politically engaged respondents
z.data['miss'] <- ifelse(is.na(z.data$differ_s),1,0)
miss.lpm <- lm(miss ~ college + know_perc  
               + interest + attention + watch_news + discuss
               + inpartisan + approve 
               + entropy + entropy_st               
               + male + race_black + race_ai + race_ap + age + income_quartile, 
               data = z.data)
summary(miss.lpm)

### Little's Missing Completely at Random (MCAR) test (found in the MCAR Test.do file)



#######################################################################################
### Origins of nominal and thematic agenda capacity
#######################################################################################

set.seed(12345)
z.data <- z.data[, c('studyid', 'year', 'word_count', 'differ_s', 'differ_g', 'quasi_count', 'approve', 'ideology', 'know_perc', 'age', 'male', 'white', 'married', 'unemployed', 'class', 'income_quartile', 'college', 'inpartisan', 'outpartisan', 'interest_politics', 'attention_campaign', 'watch_news', 'discuss', 'entropy', 'entropy_st', 'interest', 'attention', 'race_black', 'race_ai', 'race_ap', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020')]


### Multiple imputation (Amelia)
# First, set up some bounds for the count variables: word count (3), differ_s (4), differ_g (5), and quasi_count (6)
bds <- matrix(c(3, 4, 5, 6, 0, 0, 0, 1, 810, 26, 20, 46), nrow = 4, ncol = 3)
bds

a.out <- amelia(z.data, m = 5, idvars = c('year', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020'), 
                cs = 'studyid',
                noms = c('approve', 'inpartisan', 'outpartisan', 'ideology', 'male', 'white', 'race_black', 'race_ai', 'race_ap', 'married', 'unemployed', 'class', 'income_quartile', 'discuss'), 
                ords = c('differ_s', 'differ_g', 'quasi_count', 'watch_news', 'interest', 'attention'),
                bounds = bds, max.resample = 1000)

#######################################################################################
### Nominal agenda capacity
#######################################################################################

fits.qc <- with(a.out, glm.nb(quasi_count ~ college + know_perc
                              + interest + attention + watch_news + discuss
                              + inpartisan + approve
                              + entropy + entropy_st 
                              + male + race_black + race_ai + race_ap + age + income_quartile
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.qc <- misim(fits.qc, n = 200)
summary(s.qc$coefs)

# Summarize the coefficients
beta.m1 <- data.frame(matrix(vector(), 0, 8, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'p', 'model'))), stringsAsFactors = FALSE)

for (i in 1:30) {
  qc.1 <- quantile(s.qc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))

  beta.m1[i,1] <- colnames(s.qc$coefs)[i]
  beta.m1[i,2:6] <- qc.1[1:5]
  
  if (qc.1[1] > 0) {
    beta.m1[i,7] <- 1 - mean(s.qc$sim.coefs[,i] > 0)
  }
  else {
    beta.m1[i,7] <- 1 - mean(s.qc$sim.coefs[,i] < 0)
  }  
}


### Expected counts (and first differences) at representative values
d.m1 <- data.frame(matrix(vector(), 0, 8, dimnames = list(c(), c('Variable', 'delta', 'lo90', 'hi90', 'lo95', 'hi95', 'p', 'model'))), stringsAsFactors = FALSE)


# College
exp.qc <- sim_setx(s.qc, x = data.frame(college = 0), x1 = data.frame(college = 1), verbose = FALSE)

i <- 1
d.m1[i,1] <- 'college'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Political Knowledge
exp.qc <- sim_setx(s.qc, x = data.frame(know_perc = 0), x1 = data.frame(know_perc = 1), verbose = FALSE)

i <- 2
d.m1[i,1] <- 'know_perc'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Interest in politics
exp.qc <- sim_setx(s.qc, x = data.frame(interest = 1), x1 = data.frame(interest = 3), verbose = FALSE)

i <- 3
d.m1[i,1] <- 'interest'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Attention to campaign
exp.qc <- sim_setx(s.qc, x = data.frame(attention = 1), x1 = data.frame(attention = 3), verbose = FALSE)

i <- 4
d.m1[i,1] <- 'attention'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Watch news? 
exp.qc <- sim_setx(s.qc, x = data.frame(watch_news = 4), x1 = data.frame(watch_news = 7), verbose = FALSE)

i <- 5
d.m1[i,1] <- 'watch_news'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Discuss politics?
exp.qc <- sim_setx(s.qc, x = data.frame(discuss = 0), x1 = data.frame(discuss = 1), verbose = FALSE)

i <- 6
d.m1[i,1] <- 'discuss'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# In-partisan
exp.qc <- sim_setx(s.qc, x = data.frame(inpartisan = 0), x1 = data.frame(inpartisan = 1), verbose = FALSE)

i <- 7
d.m1[i,1] <- 'inpartisan'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Approval
exp.qc <- sim_setx(s.qc, x = data.frame(approve = 'Disapprove'), x1 = data.frame(approve = 'Approve'), verbose = FALSE)

i <- 8
d.m1[i,1] <- 'approve'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Entropy: mean to mean + SD
exp.qc <- sim_setx(s.qc, x = data.frame(entropy = 2.71), x1 = data.frame(entropy = 2.94), verbose = FALSE)

i <- 9
d.m1[i,1] <- 'entropy'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Entropy_st: mean to mean + SD
exp.qc <- sim_setx(s.qc, x = data.frame(entropy_st = 2.40), x1 = data.frame(entropy_st = 2.74), verbose = FALSE)

i <- 10
d.m1[i,1] <- 'entropy_st'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Male
exp.qc <- sim_setx(s.qc, x = data.frame(male = "Female"), x1 = data.frame(male = "Male"), verbose = FALSE)

i <- 11
d.m1[i,1] <- 'male'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Black
exp.qc <- sim_setx(s.qc, x = data.frame(race_black = "Non-Black"), x1 = data.frame(race_black = "Black"), verbose = FALSE)

i <- 12
d.m1[i,1] <- 'black'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Native American
exp.qc <- sim_setx(s.qc, x = data.frame(race_ai = "Non-NA"), x1 = data.frame(race_ai = "Native American"), verbose = FALSE)

i <- 13
d.m1[i,1] <- 'native'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Asian/Pacific Islander
exp.qc <- sim_setx(s.qc, x = data.frame(race_ap = "Non-Asian"), x1 = data.frame(race_ap = "Asian/PI"), verbose = FALSE)

i <- 14
d.m1[i,1] <- 'asian'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Age
exp.qc <- sim_setx(s.qc, x = data.frame(age = 48), x1 = data.frame(age = 66), verbose = FALSE)

i <- 15
d.m1[i,1] <- 'age'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Income Quartile: 1st to 2nd
exp.qc <- sim_setx(s.qc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Second 25%"), verbose = FALSE)

i <- 16
d.m1[i,1] <- 'income_quartile: 1 to 2'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Income Quartile: 1st to 3rd
exp.qc <- sim_setx(s.qc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Third 25%"), verbose = FALSE)

i <- 17
d.m1[i,1] <- 'income_quartile: 1 to 3'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


# Income Quartile: 1st to 4th
exp.qc <- sim_setx(s.qc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Highest 25%"), verbose = FALSE)

i <- 18
d.m1[i,1] <- 'income_quartile: 1 to 4'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]
d.m1[i,7] <- ci.90[4]


### Quantities of Interest for various scenarios

# Cognitive resources: college and political knowledge
exp.qc <- sim_setx(s.qc, x = data.frame(college = 0, know_perc = 0), x1 = data.frame(college = 1, know_perc = 1), verbose = FALSE)

qi.m1.1 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m1.1[1:3,1] <- 'Cognitive'
qi.m1.1[1:2,2] <- c('Low', 'High')
qi.m1.1[3,2] <- 'diff'
qi.m1.1[1:2,3] <- 'exp'
qi.m1.1[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m1.1[1:3,4:6] <- ci.90[1:3,1:3]
qi.m1.1[1:3,7:8] <- ci.95[1:3,2:3]

# Political engagement: Political interest, pay attention to campaign, watch the news, discuss politics
exp.qc <- sim_setx(s.qc, x = data.frame(interest = 1, attention = 1, discuss = 0, watch_news = 0), x1 = data.frame(interest = 3, attention = 3, discuss = 1, watch_news = 7), verbose = FALSE)

qi.m1.2 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m1.2[1:3,1] <- 'Engage'
qi.m1.2[1:2,2] <- c('Low', 'High')
qi.m1.2[3,2] <- 'diff'
qi.m1.2[1:2,3] <- 'exp'
qi.m1.2[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m1.2[1:3,4:6] <- ci.90[1:3,1:3]
qi.m1.2[1:3,7:8] <- ci.95[1:3,2:3]



# Partisan tendencies: approval and in-partisan
exp.qc <- sim_setx(s.qc, x = data.frame(approval = 'Disapprove', inpartisan = 0), x1 = data.frame(approval = 'Approve', inpartisan = 1), verbose = FALSE)

qi.m1.3 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m1.3[1:3,1] <- 'Support'
qi.m1.3[1:2,2] <- c('Low', 'High')
qi.m1.3[3,2] <- 'diff'
qi.m1.3[1:2,3] <- 'exp'
qi.m1.3[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m1.3[1:3,4:6] <- ci.90[1:3,1:3]
qi.m1.3[1:3,7:8] <- ci.95[1:3,2:3]


# Diversity of political cues: entropy (2.3 to 2.9 in increments of 0.1)
qi.m1.4 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)

qi.m1.4[1:7,1] <- 'environment'
qi.m1.4[1:7,2] <- 'entropy'
qi.m1.4[1:7,3] <- 'exp'

x.start <- 1
x.sequence <- c(2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9)
for (xval in x.sequence) {
  exp.qc <- sim_setx(s.qc, x = data.frame(entropy = xval), verbose = FALSE)

  ci.90 <- summary(exp.qc[1], level = 0.90)
  ci.95 <- summary(exp.qc[1], level = 0.95)
    
  qi.m1.4[x.start,4:6] <- ci.90[1,1:3]
  qi.m1.4[x.start,7:8] <- ci.95[1,2:3]
  qi.m1.4[x.start,9] <- xval
  
  x.start <- x.start + 1
}



#######################################################################################
### Thematic agenda capacity
#######################################################################################
fits.sc <- with(a.out, glm.nb(differ_s ~ college + know_perc
                              + interest + attention + watch_news + discuss
                              + inpartisan + approve
                              + entropy + entropy_st 
                              + male + race_black + race_ai + race_ap + age + income_quartile 
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.sc <- misim(fits.sc, n = 200)
summary(s.sc$coefs)

# Summarize the coefficients
beta.m2 <- data.frame(matrix(vector(), 0, 8, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'p', 'model'))), stringsAsFactors = FALSE)

for (i in 1:30) {
  sc.1 <- quantile(s.sc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.m2[i,1] <- colnames(s.sc$coefs)[i]
  beta.m2[i,2:6] <- sc.1[1:5]
  
  if (sc.1[1] > 0) {
    beta.m2[i,7] <- 1 - mean(s.sc$sim.coefs[,i] > 0)
  }
  else {
    beta.m2[i,7] <- 1 - mean(s.sc$sim.coefs[,i] < 0)
  }  
}



### Expected counts (and first differences) at representative values
d.m2 <- data.frame(matrix(vector(), 0, 8, dimnames = list(c(), c('Variable', 'delta', 'lo90', 'hi90', 'lo95', 'hi95', 'p', 'model'))), stringsAsFactors = FALSE)

# College
exp.qc <- sim_setx(s.sc, x = data.frame(college = 0), x1 = data.frame(college = 1), verbose = FALSE)

i <- 1
d.m2[i,1] <- 'college'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Political Knowledge
exp.qc <- sim_setx(s.sc, x = data.frame(know_perc = 0), x1 = data.frame(know_perc = 1), verbose = FALSE)

i <- 2
d.m2[i,1] <- 'know_perc'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Interest in politics
exp.qc <- sim_setx(s.sc, x = data.frame(interest = 1), x1 = data.frame(interest = 3), verbose = FALSE)

i <- 3
d.m2[i,1] <- 'interest'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Attention to campaign
exp.qc <- sim_setx(s.sc, x = data.frame(attention = 1), x1 = data.frame(attention = 3), verbose = FALSE)

i <- 4
d.m2[i,1] <- 'attention'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Watch news? 
exp.qc <- sim_setx(s.sc, x = data.frame(watch_news = 4), x1 = data.frame(watch_news = 7), verbose = FALSE)

i <- 5
d.m2[i,1] <- 'watch_news'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Discuss politics?
exp.qc <- sim_setx(s.sc, x = data.frame(discuss = 0), x1 = data.frame(discuss = 1), verbose = FALSE)

i <- 6
d.m2[i,1] <- 'discuss'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# In-partisan
exp.qc <- sim_setx(s.sc, x = data.frame(inpartisan = 0), x1 = data.frame(inpartisan = 1), verbose = FALSE)

i <- 7
d.m2[i,1] <- 'inpartisan'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Approval
exp.qc <- sim_setx(s.sc, x = data.frame(approve = 'Disapprove'), x1 = data.frame(approve = 'Approve'), verbose = FALSE)

i <- 8
d.m2[i,1] <- 'approve'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Entropy: mean to mean + SD
exp.qc <- sim_setx(s.sc, x = data.frame(entropy = 2.71), x1 = data.frame(entropy = 2.94), verbose = FALSE)

i <- 9
d.m2[i,1] <- 'entropy'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Entropy_st: mean to mean + SD
exp.qc <- sim_setx(s.sc, x = data.frame(entropy_st = 2.40), x1 = data.frame(entropy_st = 2.74), verbose = FALSE)

i <- 10
d.m2[i,1] <- 'entropy_st'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Male
exp.qc <- sim_setx(s.sc, x = data.frame(male = "Female"), x1 = data.frame(male = "Male"), verbose = FALSE)

i <- 11
d.m2[i,1] <- 'male'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Black
exp.qc <- sim_setx(s.sc, x = data.frame(race_black = "Non-Black"), x1 = data.frame(race_black = "Black"), verbose = FALSE)

i <- 12
d.m2[i,1] <- 'black'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Native American
exp.qc <- sim_setx(s.sc, x = data.frame(race_ai = "Non-NA"), x1 = data.frame(race_ai = "Native American"), verbose = FALSE)

i <- 13
d.m2[i,1] <- 'native'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Asian/Pacific Islander
exp.qc <- sim_setx(s.sc, x = data.frame(race_ap = "Non-Asian"), x1 = data.frame(race_ap = "Asian/PI"), verbose = FALSE)

i <- 14
d.m2[i,1] <- 'asian'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Age
exp.qc <- sim_setx(s.sc, x = data.frame(age = 48), x1 = data.frame(age = 66), verbose = FALSE)

i <- 15
d.m2[i,1] <- 'age'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Income Quartile: 1st to 2nd
exp.qc <- sim_setx(s.sc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Second 25%"), verbose = FALSE)

i <- 16
d.m2[i,1] <- 'income_quartile: 1 to 2'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Income Quartile: 1st to 3rd
exp.qc <- sim_setx(s.sc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Third 25%"), verbose = FALSE)

i <- 17
d.m2[i,1] <- 'income_quartile: 1 to 3'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


# Income Quartile: 1st to 4th
exp.qc <- sim_setx(s.sc, x = data.frame(income_quartile = "Lower 25%"), x1 = data.frame(income_quartile = "Highest 25%"), verbose = FALSE)

i <- 18
d.m2[i,1] <- 'income_quartile: 1 to 4'

ci.90 <- summary(exp.qc[3], level = 0.90, null = 0)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]
d.m2[i,7] <- ci.90[4]


### Quantities of Interest for various scenarios

# Cognitive resources: college and political knowledge
exp.qc <- sim_setx(s.sc, x = data.frame(college = 0, know_perc = 0), x1 = data.frame(college = 1, know_perc = 1), verbose = FALSE)

qi.m2.1 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m2.1[1:3,1] <- 'Cognitive'
qi.m2.1[1:2,2] <- c('Low', 'High')
qi.m2.1[3,2] <- 'diff'
qi.m2.1[1:2,3] <- 'exp'
qi.m2.1[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m2.1[1:3,4:6] <- ci.90[1:3,1:3]
qi.m2.1[1:3,7:8] <- ci.95[1:3,2:3]

# Political engagement: Political interest, pay attention to campaign, watch the news, discuss politics
exp.qc <- sim_setx(s.sc, x = data.frame(interest = 1, attention = 1, discuss = 0, watch_news = 0), x1 = data.frame(interest = 3, attention = 3, discuss = 1, watch_news = 7), verbose = FALSE)

qi.m2.2 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m2.2[1:3,1] <- 'Engage'
qi.m2.2[1:2,2] <- c('Low', 'High')
qi.m2.2[3,2] <- 'diff'
qi.m2.2[1:2,3] <- 'exp'
qi.m2.2[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m2.2[1:3,4:6] <- ci.90[1:3,1:3]
qi.m2.2[1:3,7:8] <- ci.95[1:3,2:3]



# Partisan tendencies: approval and in-partisan
exp.qc <- sim_setx(s.sc, x = data.frame(approval = 'Disapprove', inpartisan = 0), x1 = data.frame(approval = 'Approve', inpartisan = 1), verbose = FALSE)

qi.m2.3 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)
qi.m2.3[1:3,1] <- 'Support'
qi.m2.3[1:2,2] <- c('Low', 'High')
qi.m2.3[3,2] <- 'diff'
qi.m2.3[1:2,3] <- 'exp'
qi.m2.3[3,3] <- 'diff'

ci.90 <- summary(exp.qc[1:3], level = 0.90)
ci.95 <- summary(exp.qc[1:3], level = 0.95)

qi.m2.3[1:3,4:6] <- ci.90[1:3,1:3]
qi.m2.3[1:3,7:8] <- ci.95[1:3,2:3]


# Diversity of political cues: entropy (2.3 to 2.9 in increments of 0.1)
qi.m2.4 <- data.frame(matrix(vector(), 0, 9, dimnames = list(c(), c('type', 'scenario', 'qi.name', 'qi', 'lo90', 'hi90', 'lo95', 'hi95', 'xval'))), stringsAsFactors = FALSE)

qi.m2.4[1:7,1] <- 'environment'
qi.m2.4[1:7,2] <- 'entropy'
qi.m2.4[1:7,3] <- 'exp'

x.start <- 1
x.sequence <- c(2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9)
for (xval in x.sequence) {
  exp.qc <- sim_setx(s.sc, x = data.frame(entropy = xval), verbose = FALSE)
  
  ci.90 <- summary(exp.qc[1], level = 0.90)
  ci.95 <- summary(exp.qc[1], level = 0.95)
  
  qi.m2.4[x.start,4:6] <- ci.90[1,1:3]
  qi.m2.4[x.start,7:8] <- ci.95[1,2:3]
  qi.m2.4[x.start,9] <- xval
  
  x.start <- x.start + 1
}




################################################################################
### Table 2: Origins of nominal and thematic agenda capacity
################################################################################

### Export the stuff for a results table
beta.m1[8] <- 'Nominal'
beta.m2[8] <- 'Thematic'
beta <- rbind(beta.m1, beta.m2)
beta

d.m1[8] <- 'Nominal'
d.m2[8] <- 'Thematic'
delta <- rbind(d.m1, d.m2)
delta

qi.m1 <- rbind(qi.m1.1, qi.m1.2, qi.m1.3, qi.m1.4)
qi.m1[10] <- 'Nominal'

qi.m2 <- rbind(qi.m2.1, qi.m2.2, qi.m2.3, qi.m2.4)
qi.m2[10] <- 'Thematic'

qi <- rbind(qi.m1, qi.m2)


################################################################################
### Figure 1: Expected nominal and thematic agenda capacity across low and high scenarios of cognitive resources, political engagement, partisan support, and the sample range of diversity of political cues
################################################################################

# Scenarios
qi.qc <- ggplot(subset(qi, type != 'environment' & qi.name == 'exp' & V10 == 'Nominal'), aes(group = scenario)) + 
  geom_linerange(aes(x = type, ymin = lo95, ymax = hi95, color = scenario), size = 1) + 
  geom_point(aes(x = type, y = qi, color = scenario), size = 1, shape = 21, fill = 'white') + 
  theme(legend.position = 'none') + 
  xlab("") + ylab("Nominal Agenda Capacity")

qi.ds <- ggplot(subset(qi, type != 'environment' & qi.name == 'exp' & V10 == 'Thematic'), aes(group = scenario)) + 
  geom_linerange(aes(x = type, ymin = lo95, ymax = hi95, color = scenario), size = 1) + 
  geom_point(aes(x = type, y = qi, color = scenario), size = 1, shape = 21, fill = 'white') + 
  theme(legend.position = 'bottom') +   
  xlab("") + ylab("Thematic Agenda Capacity")

# Diversity of political cues
qi.qc.m2 <- ggplot(subset(qi, type == 'environment' & scenario == 'entropy' & V10 == 'Nominal')) + 
  geom_line(aes(x = xval, y = lo95), linetype = 'dashed') + 
  geom_line(aes(x = xval, y = hi95), linetype = 'dashed') +   
  geom_line(aes(x = xval, y = qi)) + 
  xlab("") + ylab("")

qi.ds.m2 <- ggplot(subset(qi, type == 'environment' & scenario == 'entropy' & V10 == 'Thematic')) + 
  geom_line(aes(x = xval, y = lo95), linetype = 'dashed') + 
  geom_line(aes(x = xval, y = hi95), linetype = 'dashed') +   
  geom_line(aes(x = xval, y = qi)) + 
  xlab("Entropy") + ylab("")

ggarrange(qi.qc, qi.qc.m2, qi.ds, qi.ds.m2, ncol = 2, nrow = 2)

################################################################################
### Substantive effects
################################################################################

sd <- sd(a.out$imputations[[1]]$quasi_count)
share.sd.cog <- 100 * (qi$qi[qi$type == 'Cognitive'& qi$scenario == 'diff' & qi$qi.name == 'diff' & qi$V10 == 'Nominal']) / sd
share.sd.pol <- 100 * (qi$qi[qi$type == 'Engage'& qi$scenario == 'diff' & qi$qi.name == 'diff' & qi$V10 == 'Nominal']) / sd
cat("SD of N.A.C is ", round(sd, 2), "; Differences in Cognitive Resources is ", round(share.sd.cog), "%; Differences in Political Engagmenet is ", round(share.sd.pol), "% \n")

F <- ecdf(a.out$imputations[[1]]$quasi_count)
start <- round(100 * F(3.56))
end <- round(100 * F(5.05))
cat("Going from low to high scenario of Political Engagement is the same as a jump from the", start, "th to the", end, "th percentiles \n")

cat("Difference in scenarios in Support =", round(qi$qi[qi$type == 'Support'& qi$scenario == 'diff' & qi$qi.name == 'diff' & qi$V10 == 'Nominal'], 3), "95% confidence intervals: [", round(qi$lo95[qi$type == 'Support'& qi$scenario == 'diff' & qi$qi.name == 'diff' & qi$V10 == 'Nominal'], 3), ",", round(qi$hi95[qi$type == 'Support'& qi$scenario == 'diff' & qi$qi.name == 'diff' & qi$V10 == 'Nominal'], 3), "]")


#######################################################################################
### Manuscript Footnote: test for equidispersion
#######################################################################################

### Nominal agenda capacity
qc <- glm(quasi_count ~ interest + attention + discuss + watch_news
          + college + know_perc + income_quartile
          + race_black + race_ai + race_ap +
            + age + male
          + approve + inpartisan
          + entropy + entropy_st 
          + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020
          , family = poisson, data = a.out$imputations[[1]])
dispersiontest(qc, trafo=1)

### Thematic agenda capacity
s.mod <- glm(differ_s ~ interest + attention + discuss + watch_news
             + college + know_perc + income_quartile
             + race_black + race_ai + race_ap +
               + age + male
             + approve + inpartisan
             + entropy + entropy_st 
             + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020
             , family = poisson, data = a.out$imputations[[1]])
dispersiontest(s.mod, trafo=1)



#######################################################################################
# Appendix Figure 1: Kernel density plots of nominal agenda capacity across political knowledge (left) and thematic agenda capacity across political interest (right)
#######################################################################################

# Political knowledge
z.data['perc2'] <- ifelse(z.data$know_perc == 0, '0% Correct', 
                          ifelse(z.data$know_perc == 1, '100% Correct', NA))

h.qc <- ggplot(subset(z.data[!is.na(z.data$perc2), ], quasi_count <= 20), aes(x = quasi_count, fill = perc2)) + 
  geom_histogram(color = 'black', alpha = 0.5, position = 'identity', bins = 21) + 
  scale_fill_manual(values = c("0% Correct" = "#000000", "100% Correct" = "#D9D9D9")) +
  ylab('') + xlab('Count of Mentions')

# Political interest
z.data['int2'] <- ifelse(z.data$interest == 1, 'No Interest', 
                         ifelse(z.data$interest == 3, 'Interest', NA))

h.ds <- ggplot(subset(z.data[!is.na(z.data$int2), ], differ_s <= 16), aes(x = differ_s, fill = int2)) + 
  geom_histogram(color = 'black', alpha = 0.5, position = 'identity', bins = 17) + 
  scale_fill_manual(values = c("No Interest" = "#000000", "Interest" = "#D9D9D9")) +  
  ylab('') + xlab('Count of Different Mentions')

# Combine the two plots
ggarrange(h.qc, h.ds, labels = c('Nominal', 'Thematic'), ncol = 2, nrow = 1)



#######################################################################################
# Appendix Figure 2: Heat map of missing values across the respondent-level independent variables
#######################################################################################

miss.df <- read.dta("vmap.dta")

ggplot(miss.df, aes(x = year, y = vname, fill = pnmiss)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "black", high = "grey90", breaks = seq(0, 7, 2), labels = seq(0, 7, 2)) +
  xlab("") +
  ylab("") +
  theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "none") + 
  scale_x_continuous(limits = c(1982, 2022), breaks = c(1984, 1986, 1988, 1990, 1992, 1996, 1998, 2000, 2008, 2012, 2016, 2020)) +
  labs(fill = "Surveys") 


#######################################################################################
#######################################################################################
### Appendix: Robustness checks
#######################################################################################
#######################################################################################

#######################################################################################
### Table 4: Origins of nominal and thematic agenda capacity: political support interaction
#######################################################################################

# Nominal agenda capacity
fits.qc <- with(a.out, glm.nb(quasi_count ~ college + know_perc
                              + interest + attention + watch_news + discuss
                              + inpartisan*approve
                              + entropy + entropy_st 
                              + male + race_black + race_ai + race_ap + age + income_quartile
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.qc <- misim(fits.qc, n = 200)
summary(s.qc$coefs)

# Summarize the coefficients
beta.r1 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

for (i in 1:31) {
  qc.1 <- quantile(s.qc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.r1[i,1] <- colnames(s.qc$coefs)[i]
  beta.r1[i,2:6] <- qc.1[1:5]
}


# Thematic agenda capacity
fits.sc <- with(a.out, glm.nb(differ_s ~ college + know_perc
                              + interest + attention + watch_news + discuss
                              + inpartisan*approve
                              + entropy + entropy_st 
                              + male + race_black + race_ai + race_ap + age + income_quartile 
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.sc <- misim(fits.sc, n = 200)
summary(s.sc$coefs)

# Summarize the coefficients
beta.r2 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

for (i in 1:31) {
  sc.1 <- quantile(s.sc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.r2[i,1] <- colnames(s.sc$coefs)[i]
  beta.r2[i,2:6] <- sc.1[1:5]
}

beta.r1[7] <- 'Nominal'
beta.r2[7] <- 'Thematic'
beta <- rbind(beta.r1, beta.r2)
beta

#######################################################################################
### Table 5: Origins of nominal and thematic agenda capacity: political engagement scale
#######################################################################################

z.data <- read.dta("PAC.dta")
z.data <- fastDummies::dummy_cols(z.data, select_columns = "year")
z.data2 <- z.data[, c('studyid', 'year', 'word_count', 'differ_s', 'differ_g', 'quasi_count', 'approve', 'ideology', 'know_perc', 'age', 'male', 'white', 'married', 'unemployed', 'class', 'income_quartile', 'college', 'inpartisan', 'outpartisan', 'interest_politics', 'attention_campaign', 'entropy', 'entropy_st', 'race_black', 'race_ai', 'race_ap', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020', 'engage')]

# Set the seed
set.seed(12345)

# First, set up some bounds for the count variables: word count (3), differ_s (4), differ_g (5), and quasi_count (6)
bds <- matrix(c(3, 4, 5, 6, 0, 0, 0, 1, 810, 26, 20, 46), nrow = 4, ncol = 3)
bds

a.out <- amelia(z.data2, m = 5, idvars = c('year', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020'), 
                cs = 'studyid',
                noms = c('approve', 'inpartisan', 'outpartisan', 'ideology', 'male', 'white', 'race_black', 'race_ai', 'race_ap', 'married', 'unemployed', 'class', 'income_quartile'), 
                ords = c('differ_s', 'differ_g', 'quasi_count'),
                bounds = bds, max.resample = 1000)

# Nominal agenda diversity
fits.qc2 <- with(a.out, glm.nb(quasi_count ~ engage
                              + college + know_perc + income_quartile
                              + race_black + race_ai + race_ap +
                              + age + male
                              + approve + inpartisan
                              + entropy + entropy_st 
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.qc2 <- misim(fits.qc2, n = 200)
summary(s.qc2$coefs)

# Summarize the coefficients
beta.r1 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

for (i in 1:27) {
  qc.r1 <- quantile(s.qc2$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.r1[i,1] <- colnames(s.qc2$coefs)[i]
  beta.r1[i,2:6] <- qc.r1[1:5]
}

# Political Engagement
exp.qc <- sim_setx(s.qc2, x = data.frame(engage = 0), x1 = data.frame(engage = 1), verbose = FALSE)

d.m1 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'delta', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

i <- 1
d.m1[i,1] <- 'engagement scale'

ci.90 <- summary(exp.qc[3], level = 0.90)
ci.95 <- summary(exp.qc[3], level = 0.95)
d.m1[i,2:4] <- ci.90[1:3]
d.m1[i,5:6] <- ci.95[2:3]


# Thematic agenda diversity
fits.sc2 <- with(a.out, glm.nb(differ_s ~ engage
                               + college + know_perc + income_quartile
                               + race_black + race_ai + race_ap +
                                 + age + male
                               + approve + inpartisan
                               + entropy + entropy_st 
                               + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.sc2 <- misim(fits.sc2, n = 200)
summary(s.sc2$coefs)

# Summarize the coefficients
beta.r2 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

for (i in 1:27) {
  sc.r2 <- quantile(s.sc2$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.r2[i,1] <- colnames(s.sc2$coefs)[i]
  beta.r2[i,2:6] <- sc.r2[1:5]
}

# Political Engagement
exp.sc <- sim_setx(s.sc2, x = data.frame(engage = 0), x1 = data.frame(engage = 1), verbose = FALSE)

d.m2 <- data.frame(matrix(vector(), 0, 7, dimnames = list(c(), c('Variable', 'delta', 'lo90', 'hi90', 'lo95', 'hi95', 'model'))), stringsAsFactors = FALSE)

i <- 1
d.m2[i,1] <- 'engagement scale'

ci.90 <- summary(exp.sc[3], level = 0.90)
ci.95 <- summary(exp.sc[3], level = 0.95)
d.m2[i,2:4] <- ci.90[1:3]
d.m2[i,5:6] <- ci.95[2:3]

cat("Political Engagement produces change of", round(d.m1$delta, 3), "in Nominal Agenda Capacity \n")
cat("Political Engagement produces change of", round(d.m2$delta, 3), "in Thematic Agenda Capacity \n")



#######################################################################################
### Table 6: Origins of thematic agenda capacity based on specific and general categories of the MIPD
#######################################################################################

z.data <- z.data[, c('studyid', 'year', 'word_count', 'differ_s', 'differ_g', 'quasi_count', 'approve', 'ideology', 'know_perc', 'age', 'male', 'white', 'married', 'unemployed', 'class', 'income_quartile', 'college', 'inpartisan', 'outpartisan', 'interest_politics', 'attention_campaign', 'watch_news', 'discuss', 'entropy', 'entropy_st', 'interest', 'attention', 'race_black', 'race_ai', 'race_ap', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020')]

# Set the seed
set.seed(12345)

# First, set up some bounds for the count variables: word count (3), differ_s (4), differ_g (5), and quasi_count (6)
bds <- matrix(c(3, 4, 5, 6, 0, 0, 0, 1, 810, 26, 20, 46), nrow = 4, ncol = 3)
bds

a.out <- amelia(z.data, m = 5, idvars = c('year', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020'), 
                cs = 'studyid',
                noms = c('approve', 'inpartisan', 'outpartisan', 'ideology', 'male', 'white', 'race_black', 'race_ai', 'race_ap', 'married', 'unemployed', 'class', 'income_quartile', 'discuss'), 
                ords = c('differ_s', 'differ_g', 'quasi_count', 'watch_news', 'interest', 'attention'),
                bounds = bds, max.resample = 1000)

# Thematic agenda capacity (specific)
fits.sc <- with(a.out, glm.nb(differ_s ~ interest + attention + discuss + watch_news
                              + college + know_perc + income_quartile
                              + race_black + race_ai + race_ap 
                              + age + male
                              + approve + inpartisan
                              + entropy + entropy_st 
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.sc <- misim(fits.sc, n = 200)
summary(s.sc$coefs)

# Summarize the coefficients
beta.s <- data.frame(matrix(vector(), 0, 6, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95'))), stringsAsFactors = FALSE)

for (i in 1:30) {
  sc.1 <- quantile(s.sc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.s[i,1] <- colnames(s.sc$coefs)[i]
  beta.s[i,2:6] <- sc.1[1:5]
}

# Regression output
beta.s

# Thematic agenda capacity (general)
fits.gc <- with(a.out, glm.nb(differ_g ~ interest + attention + discuss + watch_news
                              + college + know_perc + income_quartile
                              + race_black + race_ai + race_ap 
                              + age + male
                              + approve + inpartisan
                              + entropy + entropy_st 
                              + year_1986 + year_1988 + year_1990 + year_1992 + year_1996 + year_1998 + year_2000 + year_2008 + year_2012 + year_2016 + year_2020))

# Simulate n coefficients for each of the m MI datasets
s.gc <- misim(fits.gc, n = 200)
summary(s.gc$coefs)

# Summarize the coefficients
beta.g <- data.frame(matrix(vector(), 0, 6, dimnames = list(c(), c('Variable', 'b', 'lo90', 'hi90', 'lo95', 'hi95'))), stringsAsFactors = FALSE)

for (i in 1:30) {
  gc.1 <- quantile(s.gc$sim.coefs[,i], probs = c(0.5, 0.05, 0.95, 0.025, 0.975))
  
  beta.g[i,1] <- colnames(s.gc$coefs)[i]
  beta.g[i,2:6] <- gc.1[1:5]
}

# Regression output
beta.g


# Correlation coefficient based on the five imputed datasets
correlations <- numeric(5)
for (i in 1:5) {
  data_i <- a.out$imputations[[i]]
  correlations[i] <- cor(data_i$differ_s, data_i$differ_g, method = "pearson")
}
correlations



#######################################################################################
### Figure 3: Coefficients (and 90% confidence intervals) for separate negative binomial regressions for each election year
#######################################################################################

z.data <- read.dta("PAC.dta")
z.data <- fastDummies::dummy_cols(z.data, select_columns = "year")
z.data2 <- z.data[, c('studyid', 'year', 'word_count', 'differ_s', 'differ_g', 'quasi_count', 'approve', 'ideology', 'know_perc', 'age', 'male', 'white', 'married', 'unemployed', 'class', 'income_quartile', 'college', 'inpartisan', 'outpartisan', 'interest_politics', 'attention_campaign', 'entropy', 'entropy_st', 'race_black', 'race_ai', 'race_ap', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020', 'engage')]

# Set the seed
set.seed(12345)

# First, set up some bounds for the count variables: word count (3), differ_s (4), differ_g (5), and quasi_count (6)
bds <- matrix(c(3, 4, 5, 6, 0, 0, 0, 1, 810, 26, 20, 46), nrow = 4, ncol = 3)
bds

a.out <- amelia(z.data2, m = 5, idvars = c('year', 'year_1984', 'year_1986', 'year_1988', 'year_1990', 'year_1992', 'year_1996', 'year_1998', 'year_2000', 'year_2008', 'year_2012', 'year_2016', 'year_2020'), 
                cs = 'studyid',
                noms = c('approve', 'inpartisan', 'outpartisan', 'ideology', 'male', 'white', 'race_black', 'race_ai', 'race_ap', 'married', 'unemployed', 'class', 'income_quartile'), 
                ords = c('differ_s', 'differ_g', 'quasi_count'),
                bounds = bds, max.resample = 1000)

# Loop across these years
years <- c(1984, 1986, 1988, 1990, 1992, 1996, 2000, 2008, 2012, 2016, 2020)

# Function to pool results using Rubin's Rules
pool_rubins_results <- function(models_list, year_val) {
  M <- length(models_list)
  param_names <- names(coef(models_list[[1]]))
  
  # Extract coefficients and standard errors
  coefs <- sapply(models_list, function(m) coef(m))
  ses <- sapply(models_list, function(m) sqrt(diag(vcov(m))))
  
  rownames(coefs) <- rownames(ses) <- param_names
  
  # Pool each term
  pooled <- lapply(1:nrow(coefs), function(i) {
    Q_m <- coefs[i, ]
    U_m <- ses[i, ]^2
    
    Q_bar <- mean(Q_m)
    U_bar <- mean(U_m)
    B <- var(Q_m)
    T_var <- U_bar + (1 + 1/M) * B
    SE <- sqrt(T_var)
    
    # Degrees of freedom using Rubin's approximation
    df <- (M - 1) * (1 + U_bar / ((1 + 1/M) * B))^2
    
    # t-values for confidence intervals
    t_90 <- qt(0.95, df)
    t_95 <- qt(0.975, df)
    
    data.frame(
      variable = param_names[i],
      estimate = Q_bar,
      std.error = SE,
      ci_90_lower = Q_bar - t_90 * SE,
      ci_90_upper = Q_bar + t_90 * SE,
      ci_95_lower = Q_bar - t_95 * SE,
      ci_95_upper = Q_bar + t_95 * SE,
      year = year_val
    )
  })
  
  do.call(rbind, pooled)
}

# Initialize empty list to collect results for each year
all_results <- list()

# Loop over each year
for (yr in years) {
  # Estimate model for each imputed dataset in that year
  model_list <- lapply(1:5, function(i) {
    glm.nb(differ_s ~ engage
           + college + know_perc + income_quartile
           + race_black + race_ai + race_ap
           + approve + inpartisan 
           + age + male,
           data = subset(a.out$imputations[[i]], year == yr))
  })
  
  # Pool the results and store
  pooled <- pool_rubins_results(model_list, yr)
  all_results[[as.character(yr)]] <- pooled
}

# Combine all years into a single data frame
final_results <- bind_rows(all_results)

# View the final results
print(final_results)

### Coefficient plots
# Reverse the order of years so that the most recent is at the bottom
final_results$year <- factor(final_results$year, levels = sort(unique(final_results$year), decreasing = TRUE))

# Ensure variable is character so labeller can match
final_results$variable <- as.character(final_results$variable)
var_labels <- c(
  engage = "Engagement",
  college = "College",
  know_perc = "Knowledge",
  "income_quartileSecond 25%" = "2nd Quartile",
  "income_quartileThird 25%" = "3rd Quartile",  
  "income_quartileHighest 25%" = "4th Quartile",    
  "race_blackBlack" = "Black",
  "race_aiNative American" = "American Indian",
  "race_apAsian/PI" = "Asian/PI",
  "approveApprove" = "Approve",
  inpartisan = "In-Partisan",
  age = "Age",
  "maleMale" = "Male"
)

# Plot
ggplot(final_results, aes(x = estimate, y = year)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 2) +
  geom_errorbarh(aes(xmin = ci_90_lower, xmax = ci_90_upper), height = 0.2) +
  facet_wrap(~ variable, scales = "free_x", labeller = labeller(variable = var_labels)) +
  theme_minimal() +
  labs(x = "Coefficient", y = "Year") +
  theme(
    strip.text = element_text(face = "bold"),
    axis.text.y = element_text(size = 8),
    panel.grid.minor = element_blank()
  )
